# Load Data Cube Configuration
# from odc_gee import earthengine
# dc = earthengine.Datacube(app='Cloud_Statistics')
# Import Data Cube API
# import utils.data_cube_utilities.data_access_api as dc_api
# api = dc_api.DataAccessApi()
# Import Data Cube Utilities
import datacube
import sys, os
os.environ['USE_PYGEOS'] = '0'
from dea_tools.plotting import rgb, display_map
from dea_tools.bandindices import calculate_indices
### EASI tools
sys.path.append(os.path.expanduser('../scripts'))
from ceos_utils.data_cube_utilities.clean_mask import landsat_clean_mask_invalid, landsat_qa_clean_mask
from easi_tools import EasiDefaults
from easi_tools import notebook_utils
easi = EasiDefaults() # Get the default parameters for this system
# Import Common Utilities
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
Successfully found configuration for deployment "eail"
cluster, client = notebook_utils.initialize_dask(use_gateway=False)
display(cluster if cluster else client)
print(notebook_utils.localcluster_dashboard(client, server=easi.hub))
4956af27
| Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
| Total threads: 8 | Total memory: 29.00 GiB |
| Status: running | Using processes: True |
Scheduler-0149332c-8778-4872-a0af-449a4a037d32
| Comm: tcp://127.0.0.1:45479 | Workers: 4 |
| Dashboard: http://127.0.0.1:8787/status | Total threads: 8 |
| Started: Just now | Total memory: 29.00 GiB |
| Comm: tcp://127.0.0.1:45249 | Total threads: 2 |
| Dashboard: http://127.0.0.1:34399/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:33505 | |
| Local directory: /tmp/dask-worker-space/worker-naiqewr5 | |
| Comm: tcp://127.0.0.1:41895 | Total threads: 2 |
| Dashboard: http://127.0.0.1:39371/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:45281 | |
| Local directory: /tmp/dask-worker-space/worker-dhhu6waq | |
| Comm: tcp://127.0.0.1:33453 | Total threads: 2 |
| Dashboard: http://127.0.0.1:32949/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:33327 | |
| Local directory: /tmp/dask-worker-space/worker-rk1476l0 | |
| Comm: tcp://127.0.0.1:34571 | Total threads: 2 |
| Dashboard: http://127.0.0.1:36113/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:44493 | |
| Local directory: /tmp/dask-worker-space/worker-15vbnx45 | |
https://hub.eail.easi-eo.solutions/user/jhodge/proxy/8787/status
from datacube.utils.aws import configure_s3_access
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client)
<botocore.credentials.Credentials at 0x7f5463d94790>
# Select a Product
product = "s2_l2a"
# Select a Latitude-Longitude point for the center of the analysis region
# Select the size of the box (in degrees) surrounding the center point
# Mombasa, Kenya
# lat_long = (-4.03, 39.62)
# box_size_deg = 0.15
# Calculate the latitude and longitude bounds of the analysis box
# latitude = (lat_long[0]-box_size_deg/2, lat_long[0]+box_size_deg/2)
# longitude = (lat_long[1]-box_size_deg/2, lat_long[1]+box_size_deg/2)
# Sydney Cricket Ground
# latitude = (-33.8951, -33.8902)
# longitude = (151.2219, 151.2276)
# Sydney, Australia
# latitude = (-34.039, -33.668)
# longitude = (150.867, 151.350)
# Suva, Fiji
# latitude = (-18.1725, -18.0492)
# longitude = (178.3881, 178.5190)
# An Giang Provence - Vietnam
# Test Region for EY Data Challenge
# SMALL RICE CROP AREA #23
# lat_long = (10.404, 105.236)
# box_size_deg = 0.005
# Calculate the latitude and longitude bounds of the analysis box
# latitude = (lat_long[0]-box_size_deg/2, lat_long[0]+box_size_deg/2)
# longitude = (lat_long[1]-box_size_deg/2, lat_long[1]+box_size_deg/2)
latitude = easi.latitude
longitude = easi.longitude
# Select a time range
# The inputs require a format (Min,Max) using this date format (YYYY-MM-DD)
# The Sentinel-2 allowable time range is: 2017-03-28 to current
time_extents = ('2018-01-01', '2018-12-31')
# Display the analysis region
# Click on the plot to get Lat-Lon coordinates to adjust the region
# Zoom in/out on the plot to move around the globe for other regions
display_map(longitude,latitude)
/env/lib/python3.10/site-packages/dea_tools/plotting.py:313: FutureWarning: This function is deprecated. See: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1
all_longitude, all_latitude = transform(Proj(crs), Proj('EPSG:4326'), all_x,
# Create a custom cloud coverage table here
def build_cloud_coverage_table_sentinel(product,platform,latitude,longitude,
time=None,dc=None,extra_band='green',extra_load_params={}):
load_params = dict(product=product,latitude=latitude,
longitude=longitude,group_by='solar_day',measurements=[extra_band,'SCL'],**extra_load_params)
if time is not None:
load_params["time"] = time
geo_data = dc.load(**load_params)
times = list(geo_data.time.values)
dates = [dt.astype('datetime64[D]') for dt in geo_data.time.values]
scene_slice_list = list(map(lambda t: geo_data.sel(time = t), times))
nodata_mask_list = (geo_data.SCL.values == 0)
cloud_mask_list = (geo_data.SCL.values == 1) | (geo_data.SCL.values == 3) | (geo_data.SCL.values == 8) | \
(geo_data.SCL.values == 9) | (geo_data.SCL.values == 10)
clean_mask_list = (~nodata_mask_list & ~cloud_mask_list)
clean_percent = [clean_mask.mean()*100 for clean_mask in clean_mask_list]
cloud_percent = [cloud_mask.mean()*100 for cloud_mask in cloud_mask_list]
nodata_percent = [nodata_mask.mean()*100 for nodata_mask in nodata_mask_list]
clean_count = list(map(np.sum, clean_mask_list))
total_count = list(map(np.sum, ~nodata_mask_list))
# data = {"Dates": dates,
# "clean_percentage": percentage_list,
# "clean_count": clean_pixel_count_list }
data = {"Date": dates,"Clean_percent": clean_percent,"Cloud_percent": cloud_percent,
"NoData_percent": nodata_percent,"Clean_count": clean_count,"Total_count": total_count}
return geo_data, pd.DataFrame(data=data, columns=list(data.keys()))
dc = datacube.Datacube()
# Load the data and calculate the cloud coverage for each time slice
sentinel_dataset, coverage_table = build_cloud_coverage_table_sentinel(product = product,
platform = "SENTINEL-2",
latitude = latitude,
longitude = longitude,
time = time_extents,
dc = dc,
extra_band = 'green',
extra_load_params={
'output_crs':'EPSG:6933',
'resolution': (-10,10),
'skip_broken_datasets': True,
'dask_chunks': {'time':1}
})
/env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject(
This table displays data for each time slice in the cube (starting at an index=0). The "clean percent" is the percent of pixels WITHOUT clouds. So, low numbers are cloudy scenes and high numbers are clearer scenes. The "Clean_count" is the number of clear pixels in the scene and the "Total_count" is the total number of pixels in the scene.
Typically, there is a separation of 5 days between Sentinel-2 scenes for a single location. This considers the availability of two missions (A and B) which is the case for most places in the world. When there is significant cloud cover, scenes may be missing from time series due to issues with processing and geolocation.
pd.set_option('display.max_rows', len(coverage_table))
coverage_table
| Date | Clean_percent | Cloud_percent | NoData_percent | Clean_count | Total_count | |
|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 97.832759 | 2.167241 | 0.000000 | 967746 | 989184 |
| 1 | 2018-01-06 | 43.218249 | 56.781751 | 0.000000 | 427508 | 989184 |
| 2 | 2018-01-11 | 0.000202 | 17.151410 | 82.848388 | 2 | 169661 |
| 3 | 2018-01-16 | 0.000000 | 17.151612 | 82.848388 | 0 | 169661 |
| 4 | 2018-01-21 | 98.990077 | 1.009923 | 0.000000 | 979194 | 989184 |
| 5 | 2018-01-26 | 98.729357 | 1.270643 | 0.000000 | 976615 | 989184 |
| 6 | 2018-01-31 | 97.376626 | 2.623374 | 0.000000 | 963234 | 989184 |
| 7 | 2018-02-05 | 97.557178 | 2.442822 | 0.000000 | 965020 | 989184 |
| 8 | 2018-02-15 | 0.000000 | 17.151612 | 82.848388 | 0 | 169661 |
| 9 | 2018-02-20 | 0.000000 | 100.000000 | 0.000000 | 0 | 989184 |
| 10 | 2018-02-25 | 7.855263 | 92.144737 | 0.000000 | 77703 | 989184 |
| 11 | 2018-03-02 | 68.838558 | 31.161442 | 0.000000 | 680940 | 989184 |
| 12 | 2018-03-22 | 17.080846 | 0.070765 | 82.848388 | 168961 | 169661 |
| 13 | 2018-03-27 | 0.000000 | 17.151612 | 82.848388 | 0 | 169661 |
| 14 | 2018-04-01 | 0.025374 | 17.126237 | 82.848388 | 251 | 169661 |
| 15 | 2018-04-06 | 97.599638 | 2.400362 | 0.000000 | 965440 | 989184 |
| 16 | 2018-04-11 | 95.933011 | 4.066989 | 0.000000 | 948954 | 989184 |
| 17 | 2018-04-16 | 0.028205 | 17.123407 | 82.848388 | 279 | 169661 |
| 18 | 2018-04-21 | 97.228423 | 2.771577 | 0.000000 | 961768 | 989184 |
| 19 | 2018-04-26 | 0.000000 | 100.000000 | 0.000000 | 0 | 989184 |
| 20 | 2018-05-01 | 98.966320 | 1.033680 | 0.000000 | 978959 | 989184 |
| 21 | 2018-05-11 | 98.773939 | 1.226061 | 0.000000 | 977056 | 989184 |
| 22 | 2018-05-16 | 8.271767 | 8.879844 | 82.848388 | 81823 | 169661 |
| 23 | 2018-05-21 | 0.000000 | 100.000000 | 0.000000 | 0 | 989184 |
| 24 | 2018-05-26 | 54.824279 | 45.175721 | 0.000000 | 542313 | 989184 |
| 25 | 2018-06-05 | 97.263603 | 2.735992 | 0.000404 | 962116 | 989180 |
| 26 | 2018-06-10 | 65.417354 | 34.582646 | 0.000000 | 647098 | 989184 |
| 27 | 2018-06-15 | 50.671463 | 49.328537 | 0.000000 | 501234 | 989184 |
| 28 | 2018-06-20 | 88.229490 | 11.770510 | 0.000000 | 872752 | 989184 |
| 29 | 2018-06-25 | 31.816224 | 68.183776 | 0.000000 | 314721 | 989184 |
| 30 | 2018-06-30 | 75.658523 | 24.341477 | 0.000000 | 748402 | 989184 |
| 31 | 2018-07-05 | 24.674479 | 75.325521 | 0.000000 | 244076 | 989184 |
| 32 | 2018-07-10 | 98.589140 | 1.410860 | 0.000000 | 975228 | 989184 |
| 33 | 2018-07-15 | 0.000000 | 17.151612 | 82.848388 | 0 | 169661 |
| 34 | 2018-07-20 | 10.446085 | 89.553915 | 0.000000 | 103331 | 989184 |
| 35 | 2018-07-30 | 0.054692 | 17.095707 | 82.849601 | 541 | 169649 |
| 36 | 2018-08-04 | 97.549495 | 2.450505 | 0.000000 | 964944 | 989184 |
| 37 | 2018-08-09 | 85.186073 | 14.813927 | 0.000000 | 842647 | 989184 |
| 38 | 2018-08-14 | 99.600378 | 0.399622 | 0.000000 | 985231 | 989184 |
| 39 | 2018-08-19 | 81.700068 | 18.299932 | 0.000000 | 808164 | 989184 |
| 40 | 2018-08-24 | 66.693456 | 33.306544 | 0.000000 | 659721 | 989184 |
| 41 | 2018-08-29 | 99.505552 | 0.494448 | 0.000000 | 984293 | 989184 |
| 42 | 2018-09-03 | 48.792540 | 51.207460 | 0.000000 | 482648 | 989184 |
| 43 | 2018-09-08 | 7.830292 | 92.169708 | 0.000000 | 77456 | 989184 |
| 44 | 2018-09-18 | 70.011949 | 29.988051 | 0.000000 | 692547 | 989184 |
| 45 | 2018-09-23 | 0.250914 | 16.900698 | 82.848388 | 2482 | 169661 |
| 46 | 2018-09-28 | 2.509948 | 14.641664 | 82.848388 | 24828 | 169661 |
| 47 | 2018-10-03 | 99.622113 | 0.377887 | 0.000000 | 985446 | 989184 |
| 48 | 2018-10-08 | 29.301323 | 70.698677 | 0.000000 | 289844 | 989184 |
| 49 | 2018-10-13 | 98.290510 | 1.709490 | 0.000000 | 972274 | 989184 |
| 50 | 2018-10-18 | 98.718439 | 1.281561 | 0.000000 | 976507 | 989184 |
| 51 | 2018-10-23 | 97.836702 | 2.163298 | 0.000000 | 967785 | 989184 |
| 52 | 2018-10-28 | 97.452446 | 2.547554 | 0.000000 | 963984 | 989184 |
| 53 | 2018-11-07 | 98.867450 | 1.132550 | 0.000000 | 977981 | 989184 |
| 54 | 2018-11-17 | 94.872137 | 5.127863 | 0.000000 | 938460 | 989184 |
| 55 | 2018-11-22 | 99.026875 | 0.973125 | 0.000000 | 979558 | 989184 |
| 56 | 2018-11-27 | 98.542435 | 1.457565 | 0.000000 | 974766 | 989184 |
| 57 | 2018-12-07 | 97.328909 | 2.671091 | 0.000000 | 962762 | 989184 |
| 58 | 2018-12-12 | 92.647475 | 7.352525 | 0.000000 | 916454 | 989184 |
| 59 | 2018-12-17 | 99.269095 | 0.730905 | 0.000000 | 981954 | 989184 |
| 60 | 2018-12-22 | 7.409339 | 92.590661 | 0.000000 | 73292 | 989184 |
| 61 | 2018-12-27 | 76.611732 | 23.388268 | 0.000000 | 757831 | 989184 |
plt.figure(figsize = (15,5))
plt.plot(coverage_table["Date"].values, coverage_table["Clean_percent"].values, 'bo', markersize=8)
plt.ylim([0, 105])
plt.show()
# Load the data to create an RGB image
sentinel_dataset = dc.load(like=sentinel_dataset,
product='s2_l2a',
measurements = ['red', 'green', 'blue', 'nir', 'swir_1', 'swir_2'],
dask_chunks={'time':1})
# Select one of the time slices and create an RGB image.
# Time slices are numbered from 0 to x and shown in the table above
# Review the clean_percentage values above to select scenes with few clouds
# Clouds will be visible in WHITE for an RGB image
# RGB image options
# True-Color RGB = Red, Green, Blue
# False Color RGB (Mosaic) = SWIR2, NIR, Green
slice = 0
fig, ax = plt.subplots(1, 2, figsize=(16, 8))
true_rgb = sentinel_dataset.isel(time=slice)[['red', 'green', 'blue']].to_array()
false_rgb = sentinel_dataset.isel(time=slice)[['swir_2', 'nir', 'green']].to_array()
true_rgb.plot.imshow(ax=ax[0], vmin=0, vmax=2000)
false_rgb.plot.imshow(ax=ax[1], vmin=0, vmax=5000)
ax[0].set_title('True Color'), ax[0].xaxis.set_visible(False), ax[0].yaxis.set_visible(False)
ax[1].set_title('False Color'), ax[1].xaxis.set_visible(False), ax[1].yaxis.set_visible(False)
plt.show()